Dimensional Model Scripts
01 Table Creation
CREATE TABLE IF NOT EXISTS tr_wi_2023.zip_county_wda_xwalk
(VARCHAR(5) NOT NULL ENCODE lzo
zip VARCHAR(11) ENCODE lzo
,county SMALLINT ENCODE az64
,wda PRIMARY KEY (zip)
,
)
DISTSTYLE AUTO
;ALTER TABLE tr_wi_2023.zip_county_wda_xwalk owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.zip_county_wda_xwalk TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_rdim_week
(SMALLINT NOT NULL ENCODE az64
week_id DATE NOT NULL ENCODE az64
,week_code CHAR(6) ENCODE lzo
,quarter_code SMALLINT ENCODE az64
,calendar_year SMALLINT ENCODE az64
,calendar_quarter PRIMARY KEY (week_id)
,
)
DISTSTYLE AUTO
;ALTER TABLE tr_wi_2023.wi_rdim_week owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_week TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_rdim_week TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group db_t00111_ro;
-----
CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_industry
(VARCHAR(18) ENCODE lzo
code VARCHAR(357) ENCODE lzo
,title VARCHAR(24000) ENCODE lzo
,description
)
DISTSTYLE AUTO
;ALTER TABLE tr_wi_2023.wi_rdim_industry owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_industry TO group db_t00111_ro;
-----------------------
CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_occupation
(VARCHAR(18) ENCODE lzo
SOC_code VARCHAR(357) ENCODE lzo
,SOC_title
)
DISTSTYLE AUTO
;ALTER TABLE tr_wi_2023.wi_rdim_occupation owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_occupation TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT exists tr_wi_2023.wi_mdim_person
(VARCHAR(64) ENCODE lzo
ssn int identity not null
,person_id INT ENCODE az64
,gender VARCHAR(3) ENCODE lzo
,race VARCHAR(3) ENCODE lzo
,ethnicity DATE ENCODE az64
,birth_date primary key(person_id)
,
)
DISTSTYLE AUTO
;ALTER TABLE tr_wi_2023.wi_mdim_person owner to dbadmin11;
-- Permissions
GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO dbadmin11;
GRANT INSERT, SELECT, UPDATE, DELETE ON TABLE tr_wi_2023.wi_mdim_person TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_mdim_person TO group db_t00111_ro;
------------------------
CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_fact_weekly_observation
(int not null
person_id date
,week_ending_date smallint NOT NULL
,week_id varchar(1) not null
,benefit_claimed date
,benefit_yr_start varchar(1) not null
,normal_benefit_received date
,ic_claim_date varchar(6)
,last_employer_naics int
,last_employer int
,eligible_benefit_amount varchar(3)
,earnings_during_wk int
,entitlement
,veteran_status int4varchar(3)
,commuter varchar(3)
,education varchar(3)
,disability varchar(3)
,ic_type varchar
,occupation varchar(1)
,program_type varchar(3)
,status_code varchar(1)
,stop_payment_indicator varchar(1)
,waiting_week varchar(5)
,res_zip varchar(1) not null
,employed_in_quarter smallint
,calendar_year smallint
,calendar_quarter varchar(10)
,primary_employer_id int
,primary_employer_wages int
,total_wages SMALLINT
,employer_count PRIMARY KEY (person_id, week_id)
,
)
DISTSTYLE auto
;ALTER TABLE tr_wi_2023.wi_fact_weekly_observation owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_fact_weekly_observation TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group db_t00111_ro;
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (person_id)
REFERENCES tr_wi_2023.wi_mdim_person(person_id)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (week_id)
REFERENCES tr_wi_2023.wi_rdim_week(week_id)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (res_zip)
REFERENCES tr_wi_2023.wi_rdim_zip_county_wda_xwalk(zip)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (occupation)
REFERENCES tr_wi_2023.wi_rdim_occupation(soc_code)
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (last_employer_naics)
REFERENCES tr_wi_2023.wi_rdim_industry(code)
02 Linkage Reference Dimension Load Scripts
--populate week table
CREATE OR REPLACE PROCEDURE populating_wi_rdim_week() --need TO CREATE PROCEDURE
LANGUAGE plpgsqlas $$
declare
DATE := '2006-01-07'; -- setting START date
StartDate DATE := '2023-12-30'; -- setting END date
EndDate Date DATE := StartDate; -- setting the variable date TO increate WITH EACH loop
ID smallint :=1; -- setting id variable
begin
while Date <= EndDate loop -- starting the while loop
'this is the date %', Date; -- printing OUT what date im on
raise notice
insert into tr_wi_2023.wi_rdim_week (
week_id,
week_code,
quarter_code,
calendar_year,
calendar_quarter
)values(
ID, -- Quarter ID
Date, -- Week Code
CAST(DATE_PART(y,Date) AS CHAR(4)) + 'Q' + CAST(DATE_PART(qtr,Date) AS CHAR(1)), --Quarter_Code
Date), --Calendar_Year
DATE_PART(y,Date) --Calendar_Quarter
DATE_PART(qtr,
);Date := dateadd(w,1,Date); --increasing the date variable
ID := ID + 1; -- incresing the id variable
end loop;
'loop ended';
raise info
end;
$$;
call populating_wi_rdim_week();
-------
-- populate occupation table
--only 2-digit soc presently reported in PROMIS
select distinct substring(occupation, 1, 2) from ds_wi_dwd.promis p order by 1;
select distinct substring(occupation, 3, 8) from ds_wi_dwd.promis p;
--can join to soc_xwalk where soc_group = 'Major' without duplicating join
--when just using first two characters in soc_title from soc_xwalk
select distinct p.occupation, sx.soc_title
from ds_wi_dwd.promis p
left join ds_public_1.soc_xwalk sx
on substring(p.occupation, 1, 2) = substring(sx.soc_code, 1, 2) and sx.soc_group = 'Major'
order by 1;
--will take just first two characters from soc_code into reference table and limit xwalk to just rows where
-- soc_group = 'Major'
insert into tr_wi_2023.wi_rdim_occupation(
soc_code,
soc_title
)select
1, 2) as soc_code,
substring(soc_code,
soc_titlefrom ds_public_1.soc_xwalk sx
where sx.soc_group = 'Major'
--- industry table
insert into tr_wi_2023.wi_rdim_industry(
code,
title,
description
)select
code,
title,
descriptionfrom ds_public_1.naics_descriptions
03 Person Master
--MASTERING RULES: IF GENDER, RACE, ETHNICITY, OR BIRTH_DATE CHANGE FOR AN SSN OVER TIME
-- TAKE THE MOST COMMON VALUE. IN TIES, PRIORITIZE KNOWN VALUES OVER UNKNOWN
-- first will find number of ssn values this concerns
with mult as (
SELECT ssn
FROM ds_wi_dwd.promis p
group by ssn
having count(distinct(gender)) > 1 or count(distinct(race)) > 1 or count(distinct(ethnicity)) > 1 or count(distinct(birth_date)) > 1
)select count(distinct(ssn))
from mult
--MASTERING CODE
insert into tr_wi_2023.wi_mdim_person(
ssn
,gender
,race
,ethnicity
,birth_date
)with gender_tab as
(select ssn
, gender ROW_NUMBER() over(partition by ssn order by count(*) desc,
, --PRIORITIZE KNOWN GENDER 1 AND 2 OVER UNKNOWN 0 AND 3
case when gender = 0 then 2
when gender = 1 then 1
when gender = 2 then 1
when gender = 3 then 2
else 3 end
as RowNum
) from ds_wi_dwd.promis
group by ssn, gender
),as
race_tab
(select ssn
, race ROW_NUMBER() over(partition by ssn order by count(*) desc,
, --PRIORITIZE KNOWN RACE OVER UNKNOWN, UNKNOWN OVER NULL
case when race in (1,2,3,4,5,8) then 1
when race in (0, 6) then 2
when race is null then 3
else 4 end
as RowNum
) from ds_wi_dwd.promis
group by ssn, race
),as (
ethnicity_tab select ssn
, ethnicity ROW_NUMBER() over(partition by ssn order by count(*) desc,
, --PRIORITIZE KNOWN ETHNICITY OVER UNKNOWN, UNKNOWN OVER NULL
case when ethnicity in ('Y', 'N') then 1
when ethnicity = '*' then 2
when ethnicity is null then 3
else 4 end
as RowNum
) from ds_wi_dwd.promis
group by ssn, ethnicity
),as (
birthdate_tab select ssn
, birth_date ROW_NUMBER() over(partition by ssn order by count(*) desc,
, --PRIORITIZE KNOWN BIRTH_DATE OVER IMPOSSIBLE, NULL OVER IMPOSSIBLE
case when birth_date is null then 2
when extract(year from birth_date) > 2023 then 3
else 1 end
as RowNum
) from ds_wi_dwd.promis
group by ssn, birth_date
),as (
mastered_tab select g.ssn, g.gender, r.race, e.ethnicity, b.birth_date
from gender_tab g
inner join race_tab r on g.ssn = r.ssn and g.RowNum = 1 and r.RowNum = 1
inner join ethnicity_tab e on g.ssn = e.ssn and e.RowNum = 1
inner join birthdate_tab b on g.ssn = b.ssn and b.RowNum = 1
),--MAKE SURE WE GET EVERYONE IN WAGE RECORDS WHO DOESN'T SHOW UP IN PROMIS TOO
as (
unique_ssn_wage select distinct ssn
from ds_wi_dwd.ui_wage uw
)--JOIN EVERYONE IN PROMIS TO EVERYONE IN WAGE WHO DOESN'T SHOW UP IN PROMIS TO GET FULL SET OF PEOPLE
select
coalesce(m.ssn, u.ssn) as ssn
, m.gender, m.race
, m.ethnicity
, m.birth_datefrom mastered_tab m
full outer join unique_ssn_wage u
on m.ssn = u.ssn ;
04 Fact Table Load
insert into tr_wi_2023.wi_fact_weekly_observation(
person_id
,week_ending_date
,week_id
,benefit_claimed
,benefit_yr_start
,normal_benefit_received
,ic_claim_date
,last_employer_naics
,last_employer
,eligible_benefit_amount
,earnings_during_wk
,entitlement
,veteran_status
,commuter
,education
,disability
,ic_type
,occupation
,program_type
,status_code
,stop_payment_indicator
,waiting_week
,res_zip
,employed_in_quarter
,calendar_year
,calendar_quarter
,primary_employer_id
,primary_employer_wages
,total_wages
,employer_count
)--FOR HERE: SSN WHERE CLAUSE TO SPEED UP QUERY
--get all week/person combos
with person_quarter_combos as (
select wmp.ssn, wrw.week_code
from tr_wi_2023.wi_mdim_person wmp
cross join tr_wi_2023.wi_rdim_week wrw
--where wmp.ssn = 'REDACTED'
),as (
promis_info select
p.ssnas claim_week
,week_ending_date as benefit_yr_start
,effective_date_of_claim case
,when ic_type is null and monetarily_eligible = 'Y' and stop_payment_indicator = 'N' and waiting_week = 'N' and entitlement = 0 then 'Y'
else 'N'
end as normal_benefit_received
,ic_claim_date
,last_employer_naicsas last_employer
,ui_number as eligible_benefit_amount
,weekly_benefit_amount
,earnings_during_wk
,entitlement
,veteran_status
,commuter
,education
,disability
,ic_type 1, 2) as occupation
,substring(occupation,
,program_type
,status_code
,stop_payment_indicator
,waiting_week
,res_zip from ds_wi_dwd.promis p
),--FOR NOW: IGNORE ALL WAGE RECORDS WHERE UI ACCOUNT NUMBER ISN'T ALL INTEGERS
AS (
Wage_Rank SELECT
w.ssn,year,
w.
w.quarter,ROW_NUMBER() OVER(PARTITION BY w.ssn, w.year, w.quarter ORDER BY w.wage DESC) AS RANK,
w.wage,
w.ui_accountFROM ds_wi_dwd.ui_wage w
where w.wage > 0 and substring(w.ui_account,1,1) != 'M'
),AS (
Primary_Employer_Wage select
WR.ssn,year,
WR.
WR.quarter,AS Primary_Employer_Wages,
WR.wage as Primary_Employer_ID
WR.ui_account from Wage_Rank WR
WHERE
RANK=1
WR.
),AS (
All_Employer_Wage SELECT
WR.ssn,year,
WR.
WR.quarter,COUNT(WR.ui_account) AS Employer_Count,
SUM(WR.wage) AS Total_Wages
FROM
Wage_Rank WRGROUP BY
WR.ssn,year,
WR.
WR.quarter
)select person.person_id
as week_ending_date
,pq.week_code
,week.week_idcase when pi.claim_week is null then 'N' else 'Y' end as benefit_claimed
,
,pi.benefit_yr_startcase when pi.normal_benefit_received is null then 'N' else pi.normal_benefit_received end as normal_benefit_received
,
,pi.ic_claim_date
,pi.last_employer_naics
,pi.last_employer
,pi.eligible_benefit_amount
,pi.earnings_during_wk
,pi.entitlement
,pi.veteran_status
,pi.commuter
,pi.education
,pi.disability
,pi.ic_type
,pi.occupation
,pi.program_type
,pi.status_code
,pi.stop_payment_indicator
,pi.waiting_week
,pi.res_zipCASE WHEN PEW.ssn IS NULL THEN 'N' ELSE 'Y' END AS employed_in_quarter
,
,week.calendar_year
,week.calendar_quarteras primary_empoyer_id
,PEW.Primary_Employer_ID as primary_employer_wages
,PEW.Primary_Employer_Wages as total_wages
,AEW.Total_Wages as employer_count
,AEW.Employer_Count from person_quarter_combos pq
join tr_wi_2023.wi_mdim_person person on (pq.ssn = person.ssn)
join tr_wi_2023.wi_rdim_week week on (pq.week_code = week.week_code)
left join promis_info pi on (pq.ssn = pi.ssn) and (pq.week_code = pi.claim_week)
LEFT JOIN Primary_Employer_Wage PEW ON (PEW.ssn=pq.ssn) AND (PEW.quarter=week.calendar_quarter) and (PEW.year = week.calendar_year)
LEFT JOIN All_Employer_Wage AEW ON (AEW.ssn=pq.ssn) AND (AEW.quarter=week.calendar_quarter) and (AEW.year = week.calendar_year)
order by person.person_id, pq.week_code;